#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# generic.py  -  Generic utilities
#
# $Revision: 1.4 $
#
# Copyright (C) 2015 Jan Jockusch <jan.jockusch@perfact.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
# $Id: generic.py,v 1.4 2015/10/24 13:58:16 perfact Exp $

import sys
import subprocess
import json
# For literal parsing:
import ast
import operator
# For encryption and salting:
import hashlib
import binascii
import base64
import os
# For unique logger IDs
import uuid
# For string replacements
import string
import random
import unicodedata
# For checking IP addresses
import ipaddress
# For validation (validate_port)
import re
# For socket validation
import socket
# For Python 2/3 compatibility
import six
# For markdown conversion
import html2text
import markdown
# For file parsing
import csv
# For yaml interaction
import yaml

if six.PY2:
    # Python2 backward compatibility: simulate Bytes and NameConstants
    ast.Bytes = ast.Str

    class DummyNameConstant:
        pass
    ast.NameConstant = DummyNameConstant
else:
    buffer = memoryview

# For URL previewing
if six.PY2:
    from urllib2 import urlopen
else:
    # Sadly, as seen below, the API is different...
    from urllib.request import urlopen

# HTML entities collections
if six.PY2:
    import htmlentitydefs
else:
    import html.entities as htmlentitydefs

# For html_quote
if six.PY2:
    from cgi import escape as html_escape
else:
    # cgi.escape() is deprecated since Python 3.2
    from html import escape as html_escape

if six.PY2:
    from urlparse import urlparse
else:
    from urllib.parse import urlparse

# for encoding XML 1.0
if six.PY2:
    from xmlrpclib import dumps as xmlrpc_dumps
    from xmlrpclib import loads as xmlrpc_loads
else:
    from xmlrpc.client import dumps as xmlrpc_dumps
    from xmlrpc.client import loads as xmlrpc_loads

if six.PY2:
    import imp
else:
    import importlib

# Other useful imports may include:
# url_quote urlencode url_query restructured_text

# for usage in isinstance() to match for both bytes and unicode
STRING_TYPES = (six.binary_type, six.text_type)


def deep_update(tgt, enhancer):
    """ Stolen from https://stackoverflow.com/a/71396786
    The default dict.update only merges dicts on the first level and might
    drops sub-keys. This one also merged sub-dicts
    """
    for key, val in enhancer.items():
        if key not in tgt:
            tgt[key] = val
            continue

        if isinstance(val, dict):
            if isinstance(tgt[key], dict):
                deep_update(tgt[key], val)
            else:
                tgt[key] = val
        else:
            tgt[key] = val
    return tgt


def url_split(url):
    '''Wrap urlparse to return a dictionary, which can be used directly in
    RestrictedPython.

    >>> (url_split('http://perfact.de/') ==
    ...  {'fragment': '', 'netloc': 'perfact.de', 'params': '', 'query': '',
    ...  'path': '/', 'scheme': 'http'})
    True
    >>> (url_split('brokenproto://host.de:80/junk://another.host.de') ==
    ...  {'fragment': '', 'netloc': 'host.de:80', 'params': '', 'query': '',
    ...   'path': '/junk://another.host.de', 'scheme': 'brokenproto'})
    True
    '''
    parse_result = urlparse(url)
    url_dict = {
        key: getattr(parse_result, key)
        for key in ['scheme', 'netloc', 'path', 'params', 'query', 'fragment']
    }
    return url_dict


def html_quote(v):
    '''Smaller implementation of Products.PythonScripts.standard.html_quote
    But: This one takes only one string parameter.
    >>> html_quote('here is <something "dangerous">')
    'here is &lt;something &quot;dangerous&quot;&gt;'

    Note: There's an difference between Python 2 and 3 here: the escaper
    in Python 3 also escapes the single quote ('), which in Python 2 remains
    unescaped.
    '''
    return html_escape(v, quote=True)


def simple_html_unquote(value):
    '''Unquote quoted HTML text (minimal version)

    >>> simple_html_unquote(u'&lt;title&gt;&quot;Title&quot;'
    ...                     u'&lt;/title&gt;') == u'<title>"Title"</title>'
    True
    '''
    tokens = [
        ('&lt;', '<',),
        ('&gt;', '>',),
        ('&quot;', '"',),
        ('&amp;', '&',),
    ]
    for before, after in tokens:
        value = value.replace(before, after)
    return value


def html_unquote(value, protect_tags=False, custom_mapping=None):
    '''Unquote quoted HTML text.

    The option "protect_tags" excludes "amp", "quot", "lt" and "gt" from the
    list of unquotable entities.

    A custom mapping may be given which overrides existing mappings. The key
    is the entity name, the value is a code point (integer) or None if that
    name is not to be mapped.

    Usually, this routine should be called with unicode strings as input.
    >>> (html_unquote(u'Umlauts &auml; mixed with &lt; &amp; ') ==
    ...  u'Umlauts \\xe4 mixed with < & ')
    True

    But, if bytes are passed, they are treated like utf-8 encoded material
    and bytes encoded the same way are returned.
    >>> (to_ustring(html_unquote('Umlauts &auml; mixed with &lt; &amp; '))
    ...  == u'Umlauts \\xe4 mixed with < & ')
    True

    Note how, in the above examples, "amp" and "lt" are protected. If that's
    not what you want, use the following option:
    >>> (html_unquote(u'Umlauts &auml; mixed with &lt; &amp; ',
    ...               protect_tags=True) ==
    ...  u'Umlauts \\xe4 mixed with &lt; &amp; ')
    True
    >>> (html_unquote(u'Umlauts &auml; mixed with &#x3c; &amp; ',
    ...               protect_tags=True) ==
    ...  u'Umlauts \\xe4 mixed with &#x3c; &amp; ')
    True

    All versions of entity encoding are handled by this routine. See three
    versions of the same character here:
    >>> html_unquote(u'&#199; &#xC7; &Ccedil;') == u'\\xc7 \\xc7 \\xc7'
    True

    Corrupt entities should remain untouched:
    >>> (html_unquote(u'corrupt: &;, &#; &unknown; &#XY; &/(/') ==
    ...  u'corrupt: &;, &#; &unknown; &#XY; &/(/')
    True

    If you want a different behaviour on some entities, you can add them to
    your custom mapping. This only works for named entities.
    >>> (html_unquote(u'weird: &auml; &ouml; &uuml;',
    ...               custom_mapping={'auml': 0x3a3, 'ouml': None}) ==
    ...  u'weird: \\u03a3 &ouml; \\xfc')
    True

    '''
    my_entities = dict(htmlentitydefs.name2codepoint)  # Start with a copy.
    if custom_mapping:
        my_entities.update(custom_mapping)
    excluded_codepoints = []
    if protect_tags:
        my_entities.update({
            'amp': None, 'lt': None, 'gt': None, 'quot': None,
        })
        excluded_codepoints = [0x26, 0x3c, 0x3e, 0x22, ]

    for name, codepoint in my_entities.items():
        # Convert all
        if codepoint is None:
            continue
        cp_char = (b'\\u%04x' % codepoint).decode('raw_unicode_escape')
        my_entities[name] = cp_char

    out = []
    # Conserve 8-bittiness or unicodeness
    old_type = get_type(value)
    # But work only in unicode
    value = to_ustring(value)
    while value:
        a = value.find('&')
        b = value.find(';', a)
        if a == -1 or b == -1:
            out.append(value)
            break
        ent = value[a+1:b]
        out.append(value[:a])
        if len(ent) > 2 and ent[0] == '#':
            # numeric entity. Hex or decimal?
            offset, base = (2, 16) if ent[1] == 'x' else (1, 10)
            try:
                codepoint = int(ent[offset:], base)
            except ValueError:
                codepoint = None
            if codepoint is not None and codepoint not in excluded_codepoints:
                cp_char = (b'\\u%04x' % codepoint).decode('raw_unicode_escape')
                out.append(cp_char)
            else:
                # Keep the entity unescaped
                out.append(value[a:b+1])
        elif ent in my_entities and my_entities[ent] is not None:
            cont = my_entities[ent]
            out.append(cont)
        else:
            # Keep the entity unescaped
            out.append(value[a:b+1])
        value = value[b+1:]

    new_value = ''.join(out)
    # Go back to being 8bit if that's required.
    if old_type == 'str':
        new_value = to_string(new_value)
    return new_value


# Generic data visualization

def dict_view(value, recursive_tables=True, allow_unknown_types=False):
    '''Visualize a dictionary containing lists or dictionaries in HTML.

    This routine produces an HTML visualization of the python structure given
    in "value".

    Normally objects with unknown types will throw an assertion error. This
    behaviour can be changed by setting 'allow_unknown_types=True', which
    tries to get a string representation.

    >>> (dict_view({'my_key': 'my_value'}) ==
    ...  u'<dl>\\n <dt>my_key</dt><dd>my_value</dd>\\n</dl>\\n')
    True

    It escapes the content of each field cleanly in HTML.

    >>> (dict_view('<b>HTML "&lt;</b>') ==
    ...  u'&lt;b&gt;HTML &quot;&amp;lt;&lt;/b&gt;')
    True
    >>> (dict_view({
    ...      '<b>HTML "&lt;</b>': '<b>HTML "&lt;</b>'}) ==
    ...  u'<dl>\\n <dt>&lt;b&gt;HTML &quot;&amp;lt;&lt;/b&gt;</dt>'
    ...  u'<dd>&lt;b&gt;HTML &quot;&amp;lt;&lt;/b&gt;</dd>\\n</dl>\\n')
    True
    >>> (dict_view([
    ...      '<b>HTML "&lt;</b>', '<b>HTML "&lt;</b>']) ==
    ...  u'<ol>\\n <li>&lt;b&gt;HTML &quot;&amp;lt;&lt;/b&gt;</li>\\n '
    ...  u'<li>&lt;b&gt;HTML &quot;&amp;lt;&lt;/b&gt;</li>\\n</ol>\\n')
    True
    >>> (dict_view([
    ...      {'<b>A "&lt;</b>': '<b>1 "&lt;</b>'},
    ...      {'<b>B "&lt;</b>': '<b>2 "&lt;</b>'},
    ...      {'<b>C "&lt;</b>': '<b>3 "&lt;</b>'},
    ...  ]) ==
    ...  u'<table> '
    ...  u'<tr> <th> &lt;b&gt;A &quot;&amp;lt;&lt;/b&gt; </th> '
    ...  u'<th> &lt;b&gt;B &quot;&amp;lt;&lt;/b&gt; </th> '
    ...  u'<th> &lt;b&gt;C &quot;&amp;lt;&lt;/b&gt; </th> </tr>\\n'
    ...  u'<tr> <td> &lt;b&gt;1 &quot;&amp;lt;&lt;/b&gt; </td> </tr>\\n'
    ...  u'<tr> <td>  </td> <td> &lt;b&gt;2 &quot;&amp;lt;&lt;/b&gt; </td> '
    ...  u'</tr>\\n<tr> <td>  </td> <td>  </td> '
    ...  u'<td> &lt;b&gt;3 &quot;&amp;lt;&lt;/b&gt; </td> </tr></table>')
    True
    >>> (dict_view({
    ...      'unkown_type': sys.settrace
    ...  }, allow_unknown_types=True) ==
    ...  u'<dl>\\n <dt>unkown_type</dt>'
    ...  u'<dd>&lt;built-in function settrace&gt;</dd>\\n</dl>\\n'
    ... )
    True
    '''

    def my_get_type(obj):
        'Identify the type of an object.'
        if obj is None:
            return 'direct'
        t = get_type(obj)
        if t in ('str', 'unicode', 'int', 'long', 'float', 'bool',
                 'instance'):
            return 'direct'
        if t in ('list', 'tuple'):
            return 'list'
        if t in ('dict',):
            return 'dict'
        if allow_unknown_types:
            return 'direct'

        assert False, "Type could not be determined for %s" % str((obj, t, ))

    def render_table(obj):
        'Render a table (needs a sequence of dictionaries.)'
        if len(obj) == 0:
            return ''
        # Initialize header
        try:
            h = list(obj[0].keys())
        except AttributeError:
            return ''
        h.sort()
        lines = []
        for item in obj:
            ll = [None, ] * len(h)
            if my_get_type(item) != 'dict':
                return ''
            for key in item.keys():
                value = item[key]
                if my_get_type(value) != 'direct':
                    if recursive_tables:
                        value = render(value)
                    else:
                        return ''
                else:
                    value = to_ustring(value)
                    value = html_quote(value)
                # Extend the header
                if key not in h:
                    h.append(key)
                    ll.append(None)
                # Column index:
                c = h.index(key)
                ll[c] = value
            # Format line
            line = ' '.join(['<td> %s </td>' % (a or '') for a in ll])
            lines.append(line)

        header = ' '.join(['<th> %s </th>' % html_quote(a) for a in h])
        body = '\n'.join(['<tr> %s </tr>' % a for a in [header, ]+lines])
        return '<table> ' + body + '</table>'

    def render(obj):
        'Render this object and its sub-objects.'
        type = my_get_type(obj)

        if type == 'direct':
            return html_quote(to_ustring(obj))

        if type == 'list':
            t = render_table(obj)
            if t:
                return t
            res = [' <li>'+render(a)+'</li>\n' for a in obj]
            return '<ol>\n' + ''.join(res) + '</ol>\n'

        if type == 'dict':
            keys = list(obj.keys())
            keys.sort()
            res = [' <dt>' + html_quote(to_ustring(a)) + '</dt>'
                   '<dd>' + render(obj[a]) + '</dd>\n'
                   for a in keys
                   ]
            return '<dl>\n' + ''.join(res) + '</dl>\n'

    return render(value)


# HTML helpers

def html_process(body, processor=None, passthrough=None):
    '''Process a HTML document.

    Each tag level that the processor descends into is passed to a processor.
    The dictionary that this processor receives can be manipulated to do
    any conversion necessary.

    The tests show the tags, attributes and contents being passed down. the
    "before" segment is one which will not be passed through any more
    recursions. It is therefore the natural place to apply conversions.

    >>> def testing_processor(data, passthrough=None):
    ...     data['unquote'] = repr(
    ...         html_unquote(data['before'], protect_tags=True)).strip("u'")
    ...     print(u"b={before} u={unquote} a={after} l={left} c={contents} "
    ...           u"r={right} p={parenttag} t={tag}".format(**data))
    ...     return data
    >>> html_process(u'<tag attribute="value">'
    ...              u'<nested>text <br/>content &auml;</nested></tag>',
    ...              processor=testing_processor) == u'text content &auml;'
    b= u= a= l= c=<nested>text <br/>content &auml;</nested> r= p=None t=tag
    b= u= a= l= c=text <br/>content &auml; r= p=tag t=nested
    b=text  u=text  a=content &auml; l= c= r= p=nested t=br
    b=content &auml; u=content \\xe4 a= l= c= r= p=nested t=
    True

    The processor is robust against broken HTML, like unclosed tags:
    >>> html_process(
    ...     u'<corrupt>opened tag does not close',
    ...     processor=testing_processor) == u'opened tag does not close'
    b= u= a= l= c=opened tag does not close r= p=None t=corrupt
    b=opened tag does not close u=opened tag does not close a= l= c= r= \
p=corrupt t=
    True

    Wrong nesting leads to the second closing tag being ignored.
    >>> html_process(u'<wrong><nesting> of tags </wrong></nesting>',
    ...              processor=testing_processor) == u' of tags '
    b= u= a=</nesting> l= c=<nesting> of tags  r= p=None t=wrong
    b= u= a= l= c= of tags  r= p=wrong t=nesting
    b= of tags  u= of tags  a= l= c= r= p=nesting t=
    b= u= a= l= c= r= p=None t=
    True

    Incomplete tags are ignored completely.
    >>> html_process(u'abc < abc',
    ...              processor=testing_processor) == u'abc &lt; abc'
    b=abc &lt; u=abc &lt; a= abc l= c= r= p=None t=
    b= abc u= abc a= l= c= r= p=None t=
    True
    >>> html_process(u'abc<>abc',
    ...              processor=testing_processor) == u'abcabc'
    b=abc u=abc a=abc l= c= r= p=None t=
    b=abc u=abc a= l= c= r= p=None t=
    True
    >>> html_process(u'abc>abc',
    ...              processor=testing_processor) == u'abc&gt;abc'
    b=abc&gt;abc u=abc&gt;abc a= l= c= r= p=None t=
    True
    >>> html_process(u'<<><<<<b>X</b>',
    ...              processor=testing_processor) == u'X'
    b= u= a= l= c=<<<<b>X</b> r= p=None t=<
    b= u= a= l= c=X</b> r= p=< t=<<<b
    b=X u=X a= l= c= r= p=<<<b t=
    True

    >>> html_process(u'<p>a<p><p><strong>b</strong></p>',
    ...              processor=testing_processor) == u'ab'
    b= u= a= l= c=a<p><p><strong>b</strong></p> r= p=None t=p
    b=a u=a a= l= c=<p><strong>b</strong></p> r= p=p t=p
    b= u= a= l= c=<strong>b</strong> r= p=p t=p
    b= u= a= l= c=b r= p=p t=strong
    b=b u=b a= l= c= r= p=strong t=
    True

    >>> html_process(u'<p><p><p><p><p><p><p><p><p><p><p>'
    ...     u'<p><p><p><p><p><p><p><p><p><p><p>'
    ...     u'<p><p><p><p><p><p><p><p><p><p><p>'
    ...     u'<p><p><p><p><p><p><p><p><p><p><p>'
    ...     u'<p><p><p><p><p><p><p><p><p><p><p>'
    ...     u'<p><p><p><p><p><p><p><p><p><p><p>A'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>'
    ...     u'</p></p></p></p></p></p></p></p></p></p></p>',
    ... ) == u'A'
    True
    '''

    if passthrough is None:
        passthrough = {}

    def nil_processor(data, passthrough=None):
        return data

    if not processor:
        processor = nil_processor

    def handle_one_tag(value, passthrough=None, parenttag=None):
        if passthrough is None:
            passthrough = {}
        # find next tag from start
        a = value.find('<')
        if a == -1:
            # break recursion
            # no tag was found. It is still possible that a '>' slipped in.
            value = value.replace('>', '&gt;')
            # (simplified processor)
            data = {
                'tag': '',
                'attrs': {},
                'before': value,
                'contents': '',
                'after': '',
                'left': '',
                'right': '',
                'parenttag': parenttag,
            }
            data = processor(data, passthrough)
            return data['before'], ''

        # Label everything up to the beginning of the tag as "before"
        before = value[:a]

        # end of tag
        b = value.find('>', a)
        if b == -1:
            # treat as an empty tag
            b = a
            # no '>' found means that we emit ''&lt;'
            before += '&lt;'

        tag = value[a+1:b].strip()

        # tag starts with "/": indication of wrong nesting
        if tag.startswith('/'):
            # treat as an empty tag, to prevent wrong lookups
            tag = ''

        # Singleton or empty tag?
        if len(tag) == 0 or tag.endswith('/'):
            singleton = True
            tag = tag[:-1].strip()
        elif tag.lower() == 'br':
            singleton = True
        else:
            singleton = False
        try:
            tag, attr_str = tag.split(None, 1)
        except ValueError:
            attr_str = ''

        # Build attributes dictionary
        attrs = attr_dict(attr_str)

        # find correct closing tag
        nest = 0
        cursor = b+1
        c = cls = opn = None
        while True and not singleton:
            cls = value.find('</'+tag, cursor)
            if cls == -1:
                cls = len(value)
            opn = value.find('<'+tag,  cursor)
            if nest > 0 and (opn == -1 or cls < opn):
                # next tag is closing, decrease nesting
                cursor = cls+1+len(tag)
                nest -= 1
                continue
            if opn == -1:
                # No further opening tags:
                c = cls
                break
            if opn < cls:
                # Opening tag found
                nest += 1
                cursor = opn+1+len(tag)
                continue
            if nest == 0:
                # Closing tag found, no nesting:
                c = cls
                break
            nest -= 1

        if singleton:
            # Singleton? Assume immediate
            after = value[b+1:]
            contents = ''
        elif c == -1:
            # no matching tag? assume end
            after = ''
            contents = value[b+1:]
        else:
            d = value.find('>', c)
            if d == -1:
                d = len(value)
            after = value[d+1:]
            contents = value[b+1:c]

        # Process the parts.
        # We hand over the tag, the attributes,
        # the parts before and after, and the contents.
        data = {
            'parenttag': parenttag,
            'tag': tag,
            'attrs': attrs,
            'before': before,
            'contents': contents,
            'after': after,
            'left': '',
            'right': '',
        }
        data = processor(data, passthrough)
        return (data['before'] +
                data['left'] +
                handle_tags(data['contents'], passthrough, parenttag=tag) +
                data['right'],
                data['after'])

    def handle_tags(value, passthrough, parenttag=None):
        rest = value
        result = ''
        while len(rest):
            handled, rest = handle_one_tag(
                rest, passthrough, parenttag=parenttag)
            result += handled
        return result

    body = to_ustring(body)

    # collapse blanks. DANGER: this destroys <pre> and pretty formatting,
    # because it removes all \n's!
    # body = ' '.join(body.split())

    body = handle_tags(body, passthrough)

    return body


def html_cleanup(body, custom_tags=None, valid=None, purge_tags=None):
    '''Remove unwanted tags from HTML.

    This routine also unquotes HTML entities along the way:
    >>> html_cleanup(
    ...     '<style>#foobar{color: white;}</style><h1>Title</h1>',
    ...     purge_tags=['style']
    ... ) == '<h1>Title</h1>'
    True
    >>> (html_cleanup('<h1>Title &gt; Umlaut A <br/> is &Auml;</h1>') ==
    ...  u'<h1>Title &gt; Umlaut A <br/> is \\xc4</h1>')
    True

    >>> (html_cleanup('<h1>Title</h1><p>Dangerous: '
    ...               '<a href="blank">click</a></p>') ==
    ...  u'<h1>Title</h1><p>Dangerous: click</p>')
    True
    >>> (html_cleanup('<h1>Title</h1><p>Dangerous: '
    ...               '<script>alert(1)</script></p>') ==
    ...  u'<h1>Title</h1><p>Dangerous: alert(1)</p>')
    True
    >>> (html_cleanup('<h1>Title</h1><p>Dangerous: '
    ...               '<><<<<script>alert(1)</script></p>') ==
    ...  u'<h1>Title</h1><p>Dangerous: alert(1)</p>')
    True
    >>> (html_cleanup("&#x3c;script&#x3e;alert(1)&#x3c;/script&#x3e;") ==
    ...  u'&#x3c;script&#x3e;alert(1)&#x3c;/script&#x3e;')
    True
    '''
    # def src_validator(val):
    #     # Forbid external URLs
    #     if val.find('//') != -1: return 'INVALID'
    #     # Path correction (app specialty)
    #     val = val.replace('../', '')
    #     return val

    if purge_tags is None:
        purge_tags = []

    if valid is None:
        # Define which tags (and which attributes to these tags) we will
        # accept. All others are discarded!
        valid = {
            'h1': [],
            'h2': [],
            'h3': [],
            'h4': [],
            'p': [],
            'br': ['_singleton'],
            'strong': [],
            'b':     [],
            'em':     [],
            'i':     [],
            'pre':   [],
            'code':   [],
            'table':  ['width'],
            'tr':     ['width'],
            'td':     ['colspan', 'rowspan', 'width'],
            'ol':     [],
            'ul':     [],
            'li':     [],
            'dl':     [],
            'dt':     [],
            'dd':     [],

            # 'span':   ['class'],
            # 'img':    [('src', src_validator)],
        }

    # Mix with dictionary of user-defined tags
    if custom_tags:
        valid.update(custom_tags)

    def remover(data, passthrough={}):
        '''Processor: Drop all tags except a selected few.'''

        # Purge tags which are supposed to be purged
        if data['tag'] in purge_tags:
            data['before'] = ''
            data['contents'] = ''
            return data

        # Convert HTML entities into unicode
        clean = html_unquote(data['before'], protect_tags=True)
        # Get rid of "\r" and control codes
        clean = clean.replace('\r', '')
        clean = clean.replace('\x04', '')
        clean = clean.replace('\x05', '')
        data['before'] = clean

        # Detect invalid tags:
        if not data['tag'] in valid.keys():
            return data

        # Check valid attributes:
        new_attrs = {}
        for attr in valid[data['tag']]:

            if isinstance(attr, (tuple, list)):
                attr, validator = attr
            else:
                def validator(a): return a

            if attr in data['attrs']:
                new_attrs[attr] = validator(data['attrs'][attr])

        if '_singleton' in valid[data['tag']]:
            data['left'] = '<%s%s/>' % (data['tag'], attr_string(new_attrs))
            data['right'] = ''
        else:
            data['left'] = '<%s%s>' % (data['tag'], attr_string(new_attrs))
            data['right'] = '</%s>' % data['tag']

        return data

    body = html_process(body, remover)
    return body


def html_to_md(html_in):
    '''Convert HTML to markdown.

    Pass HTML code to receive markdown text:

    >>> (html_to_md(u'<p>This is a simple paragraph.</p>') ==
    ...  u'This is a simple paragraph.\\n\\n')
    True
    >>> (html_to_md(u'<ul><li>Item</li><li>Item</li></ul>') ==
    ...  u'  * Item\\n  * Item\\n\\n')
    True

    Unicode is passed transparently, so you should always use unicode for
    this program and encode the output to binary if necessary:

    >>> (html_to_md(u'<b>\\xf6\\xe4\\xfc\\xdf\\xe1\\xe9\\xed</b>') ==
    ...  u'**\\xf6\\xe4\\xfc\\xdf\\xe1\\xe9\\xed**\\n\\n')
    True

    Script tags are removed:

    >>> (html_to_md(u'<script>alert("Hello world!")</script>') ==
    ...  u'\\n\\n')
    True

    No strict syntax checking. The tool makes a best effort:

    >>> (html_to_md(u'<h1>unclosed<br><p>wrong closing tag</b>') ==
    ...  u'# unclosed  \\n\\nwrong closing tag**\\n\\n')
    True

    For more information, see the documentation of the module "html2text".
    '''
    html_converter = html2text.HTML2Text()
    md_out = html_converter.handle(html_in)
    return md_out


def md_to_html(md_in):
    '''Convert markdown (plain text) into HTML.

    >>> (md_to_html(u'Hello\\n=====\\n\\nTo the world.') ==
    ...  '<h1>Hello</h1>\\n<p>To the world.</p>')
    True

    The converter will make best guesses. For instance if follow-up paragraphs
    in items are not correctly indented, the paragraph will be considered
    separate.

    >>> (md_to_html(u'- item\\n\\n   wrong indent\\n\\n- item') ==
    ...  u'<ul>\\n<li>item</li>\\n</ul>\\n<p>wrong indent</p>\\n'
    ...  u'<ul>\\n<li>item</li>\\n</ul>')
    True

    For more information, see the documentation of the module "markdown".
    '''
    md_converter = markdown.Markdown()
    html_out = md_converter.convert(md_in)
    return html_out


# Attribute helpers

def attr_dict(attrstring):
    '''Convert an XML attribute string into key-value pairs.
    >>> (attr_dict('a="123" b="456"') ==
    ...  {'a': '123', 'b': '456'})
    True
    '''
    d = {}

    remainder = attrstring
    while len(remainder):
        ind = remainder.find('=')
        # No further equals sign. Treat remainder as singletons
        if ind == -1:
            for key in remainder.split():
                key = key.strip().lower()
                d[key] = ''
            break

        keys = remainder[:ind].split()
        for key in keys[:-1]:
            # All but last are singletons
            key = key.strip().lower()
            d[key] = ''
        key = keys[-1]

        remainder = remainder[ind+1:]

        # quoted?
        if remainder[0] == '"':
            remainder = remainder[1:]
            ind = remainder.find('"')
            if ind == -1:
                # illegal.
                break
        else:
            ind = remainder.find(' ')
            if ind == -1:
                ind == len(remainder)
        value = remainder[:ind]
        remainder = remainder[ind+1:].strip()

        d[key] = value

    return d


def attr_string(attrs):
    '''Convert an attribute dictionary into a string.

    >>> attr_string({'this': '1', 'that': 'something'})
    ' that="something" this="1"'
    '''
    out = ''
    for key, value in sorted(attrs.items()):
        out += ' %s="%s"' % (key, value.replace('"', ''))
    return out


# Objects (serialization)

def obj_hash(val, hash_function='sha1'):
    '''Build a hash value from a pickleable object.

    >>> obj_hash({'b': 123, 'z': 456, 'm': 789})
    'cd7dbcc1eb3c2fdc74c633a8c06ed7f704c722fe'
    '''
    ser = flatten_dictionaries(val)
    # Clumsy but repeatable string representation. Perhaps we can
    # "pickle" to get something better...
    string_representation = repr(ser).encode('ascii')
    return string_hash(string_representation, hash_function=hash_function)


def string_hash(val, hash_function='sha1'):
    '''Build a hash value from a string'''
    if hash_function not in hashlib.algorithms_available:
        raise AssertionError('hash_function not supported: %s' % hash_function)
    hash = getattr(hashlib, hash_function)(val)
    return hash.hexdigest()


def flatten_dictionaries(val):
    '''Convert all dictionaries in this object to sorted items
    lists.

    >>> flatten_dictionaries({'b': 123, 'z': 456, 'm': 789})
    [('b', 123), ('m', 789), ('z', 456)]
    '''

    # Do this depth first, so you can replace the objects whenever
    # possible.
    if isinstance(val, dict):
        items = list(val.items())
        items.sort()
        for key, value in items:
            # Recursion happens here
            value = flatten_dictionaries(value)
        # Replace the dictionary
        return items
    # Everything that's not a dictionary stays the same.
    return val


# Evaluation

def safe_eval(value, **kw):
    '''
    Legacy support function: Wrapper for literal_eval.
    '''
    if kw:
        raise AssertionError("Keyword arguments unsupported")

    return literal_eval(value)


def literal_eval(value):
    '''Literal evaluator (with a bit more power than PT).

    This evaluator is capable of parsing large data sets, and it has
    basic arithmetic operators included.
    '''
    _safe_names = {'None': None, 'True': True, 'False': False}
    if isinstance(value, STRING_TYPES):
        value = ast.parse(value, mode='eval')

    bin_ops = {
        ast.Add: operator.add,
        ast.Sub: operator.sub,
        ast.Mult: operator.mul,
        ast.Div: operator.truediv,
        ast.Mod: operator.mod,
    }

    unary_ops = {
        ast.USub: operator.neg,
    }

    def _convert(node):
        if isinstance(node, ast.Expression):
            return _convert(node.body)
        elif isinstance(node, ast.Str):
            return node.s
        elif isinstance(node, ast.Bytes):
            return node.s
        elif isinstance(node, ast.Num):
            return node.n
        elif isinstance(node, ast.Tuple):
            return tuple(map(_convert, node.elts))
        elif isinstance(node, ast.List):
            return list(map(_convert, node.elts))
        elif isinstance(node, ast.Dict):
            return dict((_convert(k), _convert(v)) for k, v
                        in zip(node.keys, node.values))
        elif isinstance(node, ast.Name):
            if node.id in _safe_names:
                return _safe_names[node.id]
        elif isinstance(node, ast.NameConstant):
            return node.value
        elif isinstance(node, ast.BinOp):
            return bin_ops[type(node.op)](
                _convert(node.left), _convert(node.right))
        elif isinstance(node, ast.UnaryOp):
            return unary_ops[type(node.op)](_convert(node.operand))
        else:
            raise Exception('Unsupported type {}'.format(repr(node)))
    return _convert(value)


def json_eval(expression, jsonvar):
    '''Evaluate a Python expression using the given JSON input in jsonvars

    A simple example looks like this.

    >>> json_eval('data[0]', '{"data": [1, 2, 3]}')
    '1'

    If JSON input is a dictionary, its keys are placed into the local scope.
    If it is any other type, it is exposed as the variable "value".

    >>> json_eval('value[1]', '[12, 34, 56]')
    '34'

    Your expression may return a tuple (using comma to separate single
    entries), which will then be converted into a JSON list.

    >>> json_eval('value[0], value[2]', '[12, 34, 56]')
    '[12, 56]'

    A typical usage scenario is the extraction of topic and payload components
    from events.

    >>> json_eval('topic[0:6]',
    ...     '{"topic": ["puh", "site", "3", "kst", "922", '
    ...     '"machine", "VSP2451", "state", ""]}')
    '["puh", "site", "3", "kst", "922", "machine"]'

    >>> json_eval('topic[0:2] + topic[4:6]',
    ...     '{"topic": ["puh", "site", "3", "kst", "922", '
    ...     '"machine", "VSP2451", "state", ""]}')
    '["puh", "site", "922", "machine"]'
    '''
    available_vars = json.loads(jsonvar)
    if not isinstance(available_vars, dict):
        # Place whatever thing the variable is into a single dictionary with
        # the key 'value'
        available_vars = {'value': available_vars}
    result = eval(expression, available_vars)
    return json.dumps(result)


def parse_kwargs(value):
    '''Convert python kwargs to the corresponding dict.
    - param 'value': a string representing keyword args, i.e., something that
      might be passed to a python function expecting **kwargs. However, the
      values have to be parseable by literal_eval (so no unsafe lambdas or
      recursion is allowed)
    - returns : a python dictionary

    >>> (parse_kwargs('a=1, b=2, c="test,\\\\"=4"') ==
    ...  {'a': 1, 'b': 2, 'c': 'test,"=4'})
    True
    '''

    assert isinstance(value, STRING_TYPES)

    module = ast.parse('dict(%s)' % value)
    # module should now be something like
    # Module(
    #  body=[
    #   Expr(
    #    value=Call(
    #     func=Name(id='dict', ctx=Load()),
    #     args=[],
    #     keywords=[
    #      keyword(arg='a', value=Num(n=1)),
    #      keyword(arg='b', value=Num(n=2)),
    #      keyword(arg='c', value=Str(s='test,=4'))
    #     ],
    #     starargs=None,
    #     kwargs=None
    #    )
    #   )
    #  ]
    # )
    assert isinstance(module, ast.Module)

    childcount = 0
    for expr in ast.iter_child_nodes(module):
        childcount += 1
    assert isinstance(expr, ast.Expr) and childcount == 1

    childcount = 0
    for call in ast.iter_child_nodes(expr):
        childcount += 1
    assert isinstance(call, ast.Call) and childcount == 1

    result = {}
    for keyword in ast.iter_child_nodes(call):
        if not isinstance(keyword, ast.keyword):
            continue
        result[keyword.arg] = literal_eval(keyword.value)
    return result


# Types and string manipulation

def same_type(a, b):
    '''Provide a replacement for the missing "type" functionality in
    PythonScripts.
    '''
    return type(a) is type(b)


def get_type(a):
    '''String representation of a's type.
    '''
    ret = str(type(a))
    if ret.startswith("<type '"):
        ret = ret[7:-2]
    if ret.startswith("<class '"):
        ret = ret[8:-2]
    return ret


def cleanup_string(name,
                   valid_chars=string.printable,
                   replacement_char='_',
                   merge_replacements=True,
                   invalid_chars=''):
    '''Sanitize a name. Only valid_chars remain in the string.  Illegal
    characters are replaced with replacement_char. Adjacent
    replacements characters are merged if merge_replacements is True.

    '''
    out = ''
    merge = False
    for i in name:
        # Valid character? Add and continue.
        if (i in valid_chars and
                i not in invalid_chars):
            out += i
            merge = False
            continue

        # No replacements? No action.
        if not replacement_char:
            continue
        # In merge mode? No action.
        if merge:
            continue

        # Replace.
        out += replacement_char
        if merge_replacements:
            merge = True

    return out


def normalize_string(name):
    '''This helper function helps to create ASCII from all sorts of
    accented characters.

    >>> (normalize_string(u'Anton\\xedn_Dvo\\u0159\\xe1k_Allegro') ==
    ...  b'Antonin_Dvorak_Allegro')
    True

    >>> (normalize_string(u'\\xc4rger \\xfcber sch\\xf6n ist h\\xe4\\xdflich')
    ...  == b'Aerger ueber schoen ist haesslich')
    True
    '''
    nfkd_form = unicodedata.normalize('NFKD', to_ustring(name))
    # Handle german Umlauts
    nfkd_form = nfkd_form.replace(u'\u0308', u'e').replace(u'\xdf', 'ss')
    return nfkd_form.encode('ascii', 'ignore')


def to_ustring(value, enc='utf-8'):
    '''Convert any string (bytes or unicode) into unicode.
    '''
    if isinstance(value, six.text_type):
        return value
    if isinstance(value, six.binary_type):
        return value.decode(enc, 'ignore')

    try:
        return to_ustring(str(value))
    except Exception:
        pass
    raise ValueError("could not convert '%s' to ustring!" % str((value,)))


def to_string(value, enc='utf-8'):
    '''This method delivers bytes in python2 and unicode in python3.'''
    if isinstance(value, str):
        return value
    if isinstance(value, six.text_type):
        return value.encode(enc)
    if isinstance(value, six.binary_type):
        return value.decode(enc)
    try:
        return str(value)
    except Exception:
        pass
    raise ValueError("could not convert '%s' to string!" % str((value,)))


def to_bytes(value, enc='utf-8'):
    '''This method delivers bytes (encoded strings).'''
    if isinstance(value, memoryview):
        return value.tobytes()
    if isinstance(value, buffer):
        return bytes(buffer)
    if isinstance(value, six.binary_type):
        return value
    if isinstance(value, six.text_type):
        return value.encode(enc)
    try:
        return to_bytes(str(value))
    except Exception:
        pass
    raise ValueError("could not convert '%s' to bytes!" % str((value,)))


def to_cssclassname(value):
    '''ensure a valid css class name'''
    valid_firstchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"
    valid_chars = valid_firstchars + "0123456789"

    if not value:
        return ''

    # Prepend with _ if the first character is invalid
    if value[0] not in valid_firstchars:
        value = '_'+value

    # Dump invalid characters
    out = ''
    for char in value:
        if char in valid_chars:
            out += char

    return out


def data_cleanup(data, enc='utf-8', custom_transforms=None):
    '''Universal cleanup procedure for different data types.

    '''
    if custom_transforms is None:
        custom_transforms = {}

    def handle_default(data):
        return to_string(data, enc=enc)

    def handle_dict(data):
        for key, val in data.items():
            newval = data_cleanup(
                val, custom_transforms=custom_transforms, enc=enc)
            newkey = data_cleanup(
                key, custom_transforms=custom_transforms, enc=enc)
            if newkey != key:
                data[newkey] = newval
                del data[key]
            else:
                data[key] = newval
        return data

    def handle_list(data):
        for i in range(len(data)):
            data[i] = data_cleanup(
                data[i], custom_transforms=custom_transforms, enc=enc)
        return data

    def handle_tuple(data):
        return handle_list(list(data))

    def handle_direct(data):
        return data

    my_type = get_type(data)

    transformations = {
        'NoneType': handle_direct,
        'str': handle_direct,
        'unicode': handle_direct,
        'int': handle_direct,
        'long': handle_direct,
        'bool': handle_direct,
        'float': handle_direct,
        'list': handle_list,
        'dict': handle_dict,
        'tuple': handle_tuple,
    }
    transformations.update(custom_transforms)

    transform = transformations.get(my_type, handle_default)
    return transform(data)


def tokenize_quoted(value, quotes="'", separators=" ",
                    backslash_mode=False,
                    append_separators=False,
                    merge_separators=True):
    '''Split <value> by <separators>, respecting strings held together by
    <quotes>.

    Quotes can be used within quoted strings by duplicating them
    (default) or by prepending them with \\ (in backslash_mode). If
    several different quoting symbols are given, only the quoting
    character that was actually used needs to be quoted by
    duplication.

    Returns a list of tokens (strings).
"
    >>> tokenize_quoted("one two three 'four with blank'")
    ['one', 'two', 'three', 'four with blank']

    >>> tokenize_quoted("one  two   three")
    ['one', 'two', 'three']

    >>> tokenize_quoted('abcd"e\\ne"f\\nghi', quotes='"', separators="\\n")
    ['abcd"e', 'e"f', 'ghi']

    >>> s = 'one two "three\\\\"four\\\\\\\\five"'
    >>> print(s)
    one two "three\\"four\\\\five"
    >>> tokenize_quoted(s, quotes='"', backslash_mode=True)
    ['one', 'two', 'three"four\\\\five']

    Separate standard CSV lines:
    >>> tokenize_quoted(';1;;23;', separators=';', merge_separators=False)
    ['', '1', '', '23', '']
    >>> tokenize_quoted(';;;;', separators=';', merge_separators=False)
    ['', '', '', '', '']
    >>> tokenize_quoted('1;2;3;4;5', separators=';', merge_separators=False)
    ['1', '2', '3', '4', '5']
    '''

    tokens = []
    remainder = value
    quote = None

    while True:
        # End tokenization if nothing left to consume
        if not remainder:
            break

        # Search for the beginning of each token by skipping over separators.
        if remainder[0] in separators:
            if append_separators and len(tokens):
                tokens[-1] += remainder[0]
            remainder = remainder[1:]
            if merge_separators:
                continue
            elif len(tokens) == 0:
                # Special case where the string starts with a separator.
                tokens.append('')

        token = ''

        # If the first character of a token is a quote,
        # store that quoting character.
        if len(remainder) > 0 and remainder[0] in quotes:
            quote = remainder[0]
            remainder = remainder[1:]
            # If quoted: search for next quote character;
            # if duplicated, make singleton and continue.
            while True:
                ind = remainder.find(quote)
                if ind == -1:
                    # Mustn't happen here. We guess that this is the
                    # last token.
                    if backslash_mode:
                        # still replace all double-backslashes by backslashes
                        remainder = remainder.replace('\\\\', '\\')
                    token += remainder
                    remainder = ''
                    break
                if backslash_mode:
                    for bscount in range(ind):
                        # count how many backslashes are at the end
                        if not remainder[:ind].endswith('\\'*(bscount+1)):
                            break
                    if bscount % 2 == 1:  # quoted quoting character
                        token += (remainder[:ind-1].replace('\\\\', '\\')
                                  + quote)
                        remainder = remainder[ind+1:]
                        continue
                    else:
                        token += remainder[:ind].replace('\\\\', '\\')
                        remainder = remainder[ind+1:]
                        break
                else:
                    if remainder[ind:].startswith(quote+quote):
                        # Duplicated quote: add everything up to here
                        # to the token
                        token += remainder[:ind+1]
                        remainder = remainder[ind+2:]
                        continue
                    else:
                        # Quote has finalized a token
                        token += remainder[:ind]
                        remainder = remainder[ind+1:]
                        break
        else:
            # If not quoted: search for next separator.
            while True:
                if not remainder:
                    break
                if remainder[0] in separators:
                    break
                token += remainder[0]
                remainder = remainder[1:]

        # Store the token and repeat until no tokens are left.
        tokens.append(token)

    # Return the token list.
    return tokens


def read_csv(data, delimiter=',', quotechar='"'):
    """
    Split <data> from a list of strings by <delimiter>, respecting strings
    held together <quotechar>.
    Returns a list of lists of strings.

    >>> read_csv(['foo,bar'])
    [['foo', 'bar']]
    >>> read_csv(['foo,bar', 'fizz,buzz'])
    [['foo', 'bar'], ['fizz', 'buzz']]
    """
    return list(csv.reader(data, delimiter=delimiter, quotechar=quotechar))


def split_unescape(s, delim, escape='\\', unescape=True):
    """
    >>> split_unescape('foo,bar', ',')
    ['foo', 'bar']
    >>> split_unescape('foo$,bar', ',', '$')
    ['foo,bar']
    >>> split_unescape('foo$$,bar', ',', '$', unescape=True)
    ['foo$', 'bar']
    >>> split_unescape('foo$$,bar', ',', '$', unescape=False)
    ['foo$$', 'bar']
    >>> split_unescape('foo$', ',', '$', unescape=True)
    ['foo$']
    """
    ret = []
    current = []
    itr = iter(s)
    for ch in itr:
        if ch == escape:
            try:
                # skip the next character; it has been escaped!
                if not unescape:
                    current.append(escape)
                current.append(next(itr))
            except StopIteration:
                if unescape:
                    current.append(escape)
        elif ch == delim:
            # split! (add current to the list and reset it)
            ret.append(''.join(current))
            current = []
        else:
            current.append(ch)
    ret.append(''.join(current))
    return ret


def string_escape(s, chars, escape='\\'):
    '''Replace "chars" in "s" by "escape"+character. Also escape the
    escape character.'''
    # Escape the escape character first:
    s = s.replace(escape, escape+escape)
    for c in chars:
        s = s.replace(c, escape+c)
    return s


# Passwords and encryption

# Deprecated method encrypt_pw. Replaced by secret_encrypt()
def encrypt_pw(password):
    '''This method is used to store encrypted passwords in the external
    user database instead of cleartext.

    Code is cited from AuthEncryption.py in Zope.
    '''
    if password[:5] == '{SHA}':
        return password
    return '{SHA}' + binascii.b2a_base64(hashlib.sha1(password).digest())[:-1]


def base64decode(value):
    return binascii.a2b_base64(value)


def base64encode(value):
    return binascii.b2a_base64(value)


def md5digest(data):
    try:
        bdata = bytes(data, 'utf8')
    except TypeError:  # TODO: This should really be a PY2 distinction
        bdata = (data)
    return hashlib.md5(bdata).hexdigest()


# Additional functions for safer login methods:
# SHA and SSHA encryption for cookies and passwords
# os.urandom wrapper for cookie generation
# base64 wrappers

def secret_encrypt(secret, salt=None):
    '''This function performs SHA or seeded SHA encryption according to
    RFC 2307 and returns the according string for storage in a
    database.  Set salt = True to generate standard salt.

    >>> (secret_encrypt('my_secret', salt=None) ==
    ...  '{SHA}dYXR9865D9Cxq0LQpso5/PVQZcc=')
    True
    >>> (secret_encrypt('my_secret', salt=True)).startswith('{SSHA}')
    True
    '''
    enc = hashlib.sha1(to_bytes(secret))
    if salt:
        salt_string = os.urandom(4)
        enc.update(salt_string)
        return '{SSHA}' + to_string(base64.b64encode(
            enc.digest() + salt_string)).strip()
    return '{SHA}' + to_string(base64.b64encode(
        enc.digest())).strip()


def secret_check(encrypted, secret):
    '''Check a secret against its encrypted form.

    >>> secret_check('{SHA}dYXR9865D9Cxq0LQpso5/PVQZcc=', 'my_secret')
    True
    >>> secret_check('{SSHA}PtXj4zEBsS0Rxz55sW+USwQizCZy4prJ', 'my_secret')
    True
    >>> secret_check('{SHA}XXXX9865D9Cxq0LQpso5/PVQZcc=', 'my_secret')
    False
    '''
    encrypted = to_bytes(encrypted)
    secret = to_bytes(secret)
    encoded = encrypted[encrypted.find(b'}')+1:]
    challenge_bytes = base64.urlsafe_b64decode(encoded)
    digest = challenge_bytes[:20]
    hr = hashlib.sha1(secret)
    if len(challenge_bytes) > 20:
        salt = challenge_bytes[20:]
        hr.update(salt)
    return digest == hr.digest()


def generate_random_string(length=32, mode='normal', valid_chars=None):
    '''Generate a random string good enough for encryption purposes.
    If valid_chars is not given, ascii letters and digits are used.
    '''
    if mode not in ('normal', 'digits'):
        raise ValueError("invalid mode chosen: "+str(mode))

    if mode == 'digits':
        valid_chars = string.digits

    if valid_chars is None:
        valid_chars = string.ascii_letters + string.digits
    joiner = valid_chars[0:0]  # empty unicode or bytes, like valid_chars
    return joiner.join([random.choice(valid_chars) for _ in range(length)])


def get_uuid4():
    return str(uuid.uuid4())

# Alternative implementation for older systems:
# def get_uuid4():
#    return os.popen('uuid -v 4', 'r').read().strip()


# More encodings: JSON


def json_encode(data):
    '''Return JSON string representation of data.'''
    return json.dumps(data)


def json_decode(json_string):
    '''Return data structure from JSON string.'''
    return json.loads(json_string)

# More encodings: YAML


def yaml_encode(data, default_flow_style=False):
    '''Return YAML string representation of data.'''
    return yaml.dump(data, default_flow_style=default_flow_style)


def yaml_decode(yaml_string):
    '''Return data structure from JSON string.'''
    return yaml.load(yaml_string, Loader=yaml.FullLoader)

# More encodings: XML (1.0)


def xmlrpc_encode(data):
    '''Return a basic XML encoded version of "data".
    >>> print(xmlrpc_encode(['hello', 1.2, 34]))
    <?xml version='1.0'?>
    <methodResponse>
    <params>
    <param>
    <value><array><data>
    <value><string>hello</string></value>
    <value><double>1.2</double></value>
    <value><int>34</int></value>
    </data></array></value>
    </param>
    </params>
    </methodResponse>
    <BLANKLINE>
    '''
    return xmlrpc_dumps((data,), methodresponse=1, allow_none=True)


def xmlrpc_decode(data):
    '''Return decoded XML data.
    >>> xmlrpc_decode(xmlrpc_encode(['hello', 1.2, 34]))
    ['hello', 1.2, 34]
    '''
    return xmlrpc_loads(data)[0][0]


# Sorting (topologically, dependency graphs and alike)


def topological_sort(graph):
    '''Sort a dependency graph so that dependent objects are placed after the
    objects they depend on.

    The graph must be formulated as a dictionary with keys defining nodes and
    values enumerating a list of nodes depending on the given node.

    The algorithm works in the same way if reverse dependencies are used,
    giving a reversed ordering in that case.

    Nodes can be of any type, as long as the equality operator works.

    For the algorithm, see
    https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm

    Simple example:
    >>> graph = {
    ...     'a': ['b', 'c', ],
    ...     'b': ['d', ],
    ...     'c': ['d', ],
    ...     'd': ['e', ],
    ... }
    >>> topological_sort(graph)
    ['a', 'b', 'c', 'd', 'e']

    Cyclic dependencies cannot be sorted:
    >>> graph = {
    ...     'a': ['b', ],
    ...     'b': ['c', ],
    ...     'c': ['b', ],
    ... }
    >>> topological_sort(graph)
    Traceback (most recent call last):
        ...
    ValueError: Graph is cyclic and cannot be sorted

    '''
    # Calculate all dependent nodes
    dependent_nodes = sum(graph.values(), [])
    # Start out with all free nodes (not depending on any other node)
    free_nodes = [node for node in graph.keys()
                  if node not in dependent_nodes]

    # Start out with an empty list
    sorted = []

    while free_nodes:
        # Add a free node to the sorted list
        current = free_nodes.pop(0)
        sorted.append(current)
        # Look at all the nodes depending on this one
        dep_nodes = graph.get(current)
        if dep_nodes:
            # Remove the direct dependencies from the graph
            del graph[current]
            # Examine the remaining graph if and direct dependency
            # was the last dependency remaining.
            all_dep_nodes = sum(graph.values(), [])
            for dep_node in dep_nodes:
                # If there are no more dependencies, then this node may
                # be declared a free node.
                if dep_node not in all_dep_nodes:
                    free_nodes.append(dep_node)

    # If there are any dependencies in the graph after appending all free
    # nodes, then these are cyclic dependencies, which cannot be sorted.
    if len(graph.keys()) > 0:
        raise ValueError("Graph is cyclic and cannot be sorted")
    return sorted


# Diffing


def conserv_split(val, splitby='\n'):
    '''Split by a character, conserving it in the result.'''
    output = [a+splitby for a in val.split(splitby)]
    output[-1] = output[-1][:-len(splitby)]
    if output[-1] == '':
        output.pop()
    return output


def token_split(val):
    '''Split into groups of alpha, space and other chars.'''
    val = to_ustring(val)
    tokens = []
    cat = ''
    token = u''

    def iskind(v, group):
        for c in v:
            if c not in group:
                return False
        return True

    def getcat(v):
        if v.isalpha():
            return 'alnum'
        if v.isdigit():
            return 'alnum'
        if v.isspace():
            return 'space'
        if iskind(v, '.,;:'):
            return 'punct'
        return 'other'

    while val:
        char = val[0]
        val = val[1:]

        # Category change?
        c_cat = getcat(char)
        if token and cat != c_cat:
            # Different category
            tokens.append(token)
            token = ''

        # Same or new category:
        token += char
        cat = c_cat

    if token:
        tokens.append(token)

    return tokens


def diff_lines(a=None, b=None, use_tokens=False, max_range=10, test__=None):
    '''Compare two texts by splitting into lines and comparing those.
    Return in the form of blocks with "before", "oldtext", and "newtext"
    entries.

    >>> (diff_lines('Line1\\nLine2\\nLine3.',
    ...             'Line1\\nLine1a\\nLine2\\nLine3change.') ==
    ...  [{'newtext': u'Line1a\\n', 'oldtext': '', 'before': u'Line1\\n'},
    ...   {'newtext': u'Line3change.', 'oldtext': u'Line3.',
    ...    'before': u'Line2\\n'}])
    True
    '''
    if test__ == '1':
        a = 'Line1\nLine2\nLine3.'
        b = 'Line1\nLine1a\nLine2\nLine3change.'

    if a is None:
        a = ''
    if b is None:
        b = ''

    a = to_ustring(a)
    b = to_ustring(b)

    if use_tokens:
        lines_a = token_split(a)
        lines_b = token_split(b)
    else:
        lines_a = conserv_split(a)
        lines_b = conserv_split(b)

    blocks = []
    sametext = []
    oldtext = []
    newtext = []

    while lines_a or lines_b:

        l_a = lines_a and lines_a[0] or None
        l_b = lines_b and lines_b[0] or None

        if l_a is not None and l_b is not None and l_a == l_b:
            # Match found. Close the former block
            if newtext or oldtext:
                blocks.append({
                    'before': ''.join(sametext),
                    'oldtext': ''.join(oldtext),
                    'newtext': ''.join(newtext),
                })
                sametext = []
                oldtext = []
                newtext = []

            sametext.append(l_a)
            lines_a.pop(0)
            lines_b.pop(0)
            continue

        # Not same: search in both directions
        try:
            ind_a = lines_a.index(l_b)
        except ValueError:
            ind_a = None
        try:
            ind_b = lines_b.index(l_a)
        except ValueError:
            ind_b = None

        # Enforce maximum range
        if max_range:
            if ind_a is not None and ind_a > max_range:
                ind_a = None
            if ind_b is not None and ind_b > max_range:
                ind_b = None

        # No match? The line simply differs
        if ind_a is None and ind_b is None:
            if l_a:
                oldtext.append(l_a)
                lines_a.pop(0)
            if l_b:
                newtext.append(l_b)
                lines_b.pop(0)
            continue

        # Choose the smaller index

        if ind_a is None or (ind_a is not None and
                             ind_b is not None and ind_b <= ind_a):
            # Hit found as ind_b in lines_b for l_a, thus we ignore l_a,
            # and append all lines_b up to the index to the newtext
            newtext.extend(lines_b[:ind_b])
            lines_b = lines_b[ind_b:]
            # Next line will close the block, because it is the same.
            continue

        if ind_b is None or (ind_a is not None and
                             ind_b is not None and ind_a < ind_b):
            # Hit found as ind_a in lines_a for l_b, thus we ignore l_b,
            # and append all lines_a up to the index to the oldtext
            oldtext.extend(lines_a[:ind_a])
            lines_a = lines_a[ind_a:]
            # Next line will close the block, because it is the same.
            continue

        raise ValueError("This should never happen: "
                         + str((ind_a, ind_b, l_a, l_b)))

    if sametext or newtext or oldtext:
        blocks.append({
            'before': ''.join(sametext),
            'oldtext': ''.join(oldtext),
            'newtext': ''.join(newtext),
        })

    if test__:
        out = []
        for b in blocks:
            out.append(b+'\n')
        return ''.join(out)

    return blocks


def diff_words(a=None, b=None, test__=None):
    '''Compare texts word for word, marking differences.
    Return a list of data structures for easy interactive patching.
    '''
    if test__ == '1':
        a = 'Das Schiff liegt nicht auf Reede, aussbooten mit alten Zodiacs.'
        b = ('Hinweis: Das Schiff liegt auf Reede, ausbooten mit guten '
             'alten Zodiacs. Zusatz.')

    if a is None:
        a = ''
    if b is None:
        b = ''

    def pos_split(val):
        # Split retaining the position information
        out = []
        coll = ''
        pos = None
        for i in range(len(val)):
            c = val[i]
            if c.isspace():
                if coll:
                    out.append((pos, i, coll))
                    coll = ''
                    pos = None
                continue
            coll += c
            if pos is None:
                pos = i
        if coll:
            out.append((pos, len(val), coll))
        return out

    ta = pos_split(a)
    tb = pos_split(b)

    print_output = []

    if test__:
        print_output.append("Tokenizations:" + '\n')
        print_output.append(str(ta) + '\n')
        print_output.append(str(tb) + '\n')
        print_output.append('\n')

    ia = 0
    ib = 0
    blocks = []
    while True:
        if len(ta) <= ia:
            # rest of text has been added
            new = tb[ib:]
            blocks.append({'type': 'change',
                           'old': [(len(a), len(a), '')],
                           'new': tb[ib:]})
            break
        if len(tb) <= ib:
            # rest of text has been removed
            blocks.append({'type': 'change',
                           'old': ta[ia:],
                           'new': [(len(b), len(b), '')]})
            break

        if ta[ia][2] == tb[ib][2]:
            # in sync.
            ia += 1
            ib += 1
            continue

        # out of sync. search for next equal
        ra = len(ta) - ia
        rb = len(tb) - ib
        radius = (ra < rb) and ra or rb
        c = False
        for r in range(1, radius):
            for rr in range(0, r+1):
                if ta[ia+r][2] == tb[ib+rr][2]:
                    # words have been removed
                    old = ta[ia:ia+r]
                    new = tb[ib:ib+rr]
                    if not new:
                        new = [(tb[ib][0], (tb[ib][0] or 1)-1, '')]
                        if tb[ib][0] == 0:
                            old[-1] = (old[-1][0], old[-1][1]+1,
                                       old[-1][2] + ' ')
                    blocks.append({'type': 'change',
                                   'old': old,
                                   'new': new})  # XXX
                    ia += r
                    ib += rr
                    c = True
                    break
                if ta[ia+rr][2] == tb[ib+r][2]:
                    # words have been added
                    old = ta[ia:ia+rr]
                    new = tb[ib:ib+r]
                    if not old:
                        old = [(ta[ia][0], (ta[ia][0] or 1)-1, '')]
                        if ta[ia][0] == 0:
                            new[-1] = (new[-1][0], new[-1][1]+1,
                                       new[-1][2] + ' ')
                    blocks.append({'type': 'change',
                                   'old': old,
                                   'new': new})
                    ia += rr
                    ib += r
                    c = True
                    break
            if c:
                break
        if c:
            continue

        # end of text differs
        blocks.append({'type': 'change',
                       'old': ta[ia:],
                       'new': tb[ib:]})
        break

    if test__:
        print_output.append("Blocks:" + '\n')
        for block in blocks:
            print_output.append(str(block) + '\n')
        print_output.append('\n')

    # Highlight changes
    pos = 0
    for block in blocks:
        if block['type'] == 'change':
            block['change'] = True
            old, new = block['old'], block['new']
            if old and len(old):
                block['oldtext'] = a[old[0][0]:old[-1][1]]
            else:
                block['oldtext'] = ''

            if new and len(new):
                block['newtext'] = b[new[0][0]:new[-1][1]]
            else:
                block['newtext'] = ''

            block['repl_from'] = old[0][0] + 1
            block['repl_for'] = old[-1][1] - old[0][0]

            if new and len(new):
                block['before'] = b[pos:new[0][0]]
                pos = new[-1][1]
            else:
                block['before'] = ''

    tail = b[pos:]
    if tail:
        blocks.append({'type': 'tail', 'before': tail, 'change': False, })

    if not test__:
        return blocks

    # Debugging info:

    print_output.append("Original texts:" + '\n')
    print_output.append(a + '\n')
    print_output.append(b + '\n')
    print_output.append('\n')

    print_output.append("Formatting hints:" + '\n')
    for block in blocks:
        print_output.append('\n')
        print_output.append("Before: "+block['before'] + '\n')
        print_output.append("Type: "+block['type'] + '\n')
        if block['type'] == 'change':
            print_output.append(
                '"%(oldtext)s" -> "%(newtext)s"' % block + '\n')
            print_output.append(
                "From %(repl_from)d for %(repl_for)d" % block + '\n')
        # print_output.append( str(block) + '\n')
    return ''.join(print_output)


# IP address calculus

def ip_match(ipaddr, network):
    '''IP arithmetic to determine if an IPv4 address lies within a network.

    >>> ip_match('192.168.0.1', '192.168.0.0/24')
    True
    >>> ip_match('192.168.0.1', '192.168.0.1')
    True
    >>> ip_match('192.168.0.1', '192.168.1.0/24')
    False
    '''

    # Helper function: convert to hex notation
    def ipaddr_to_hex(ipaddr):
        'return binary representation of address.'
        out = 0
        for byte in ipaddr.split('.'):
            out = (out << 8) + int(byte)
        return out

    def mask_to_hex(mask):
        'return mask according to input.'
        try:
            bits = int(mask)
        except ValueError:
            bits = None
        if not bits:
            return ipaddr_to_hex(mask)
        full = 0xffffffff
        return full & (full << (32 - bits))

    haddr = ipaddr_to_hex(ipaddr)
    try:
        netbase, mask = network.split('/', 1)
    except ValueError:
        netbase, mask = network, '32'
    hmask = mask_to_hex(mask)
    hbase = ipaddr_to_hex(netbase) & hmask
    return (haddr & hmask == hbase)


# System calls

def safe_syscall(cmds, raisemode=False, stdin_data=None, drop_stderr=False,
                 text=True):
    '''Send a command and return both the return code and the output.
    If used as a compatibility replacement for os.system(), pass
    "raisemode=True", which will mean that you don't get the return
    code but instead an error.
    With 'stdin_data', you can supply some (binary) data as stdin to the
    command.
    drop_stderr controls if stderr is dropped or merged with stdout.
    The parameter text causes Python3 installations to expect input and deliver
    output as unicode using the system default encoding instead of delivering
    bytes.

    Test if missing executable throws an exception
    >>> safe_syscall(['/bin/nonexistent'])  # doctest: +ELLIPSIS
    Traceback (most recent call last):
        ...
    AssertionError: Error [Errno 2] No such file or directory...

    Test if stdin_data is correctly processed
    >>> (safe_syscall(['/bin/grep', 'test'],
    ...               stdin_data='asdf\\ntest1\\ntest2\\ntets') ==
    ...  (0, 'test1\\ntest2\\n'))
    True

    Test if drop_stderr is correctly processed
    >>> safe_syscall(['/usr/bin/tee', '/dev/stderr'],
    ...              stdin_data='test') == (0, 'testtest')
    True
    >>> safe_syscall(['/usr/bin/tee', '/dev/stderr'], stdin_data='test',
    ...              drop_stderr=True) == (0, 'test')
    True

    Test if unicode compatible
    >>> (to_ustring(safe_syscall(u'/bin/echo schräg')[1]) ==
    ...  u'schräg\\n')
    True
    >>> (to_ustring(safe_syscall([u'/bin/echo', u'schräg'])[1]) ==
    ...  u'schräg\\n')
    True

    Test if text is returned when requested
    >>> safe_syscall(['/bin/echo', 'schräg'])[1] == 'schräg\\n'
    True

    Test if binary mode also works
    >>> (safe_syscall(['/usr/bin/md5sum'], stdin_data=b'\\xff', text=False)[1]
    ...  == b'00594fd4f42ba43fc1ca0427a0576295  -\\n')
    True
    '''
    # Split command into list for safer parsing.
    if isinstance(cmds, STRING_TYPES):
        cmds = to_string(cmds).split()

    for element in cmds:
        assert isinstance(element, STRING_TYPES), \
            "Non-string element in %s" % str(cmds)

    # Ensure encoding
    cmds = list(map(to_string, cmds))

    # For error messages
    quoted_cmd = ' '.join(
        ["'%s'" % a.replace("'", "\\'") for a in cmds]
    )

    try:
        proc = subprocess.Popen(
            cmds,
            stdin=None if (stdin_data is None) else subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE if drop_stderr else subprocess.STDOUT,
            universal_newlines=text,
        )
        output = proc.communicate(stdin_data)[0]
        retcode = proc.returncode
    except Exception:
        t, e, tb = sys.exc_info()
        raise AssertionError("Error %s on %s" % (str(e), quoted_cmd))

    if raisemode and retcode:
        quoted_cmd = ' '.join(
            ["'%s'" % a.replace("'", "\\'") for a in cmds]
        )
        raise AssertionError("return code %s [[%s]] on %s" %
                             (retcode, output, quoted_cmd))

    return retcode, output


# validation tools
def check_socket(host, port):
    '''Simple socket validation. Try to open the socket.
    '''
    s = socket.socket()
    s.settimeout(4)
    try:
        s.connect((host, port))
        s.close()
    except Exception:  # socket.gaierror
        return None
    return 1


def validate_url(url):
    '''Simple URL validation. Try to open the URL in advance and
    return "true" if the URL is valid.
    '''
    try:
        url = urlopen(url)
    except Exception:  # ValueError, URLError, InvalidURL, HTTPError,
        # May throw an IOError. If it does, the URL is invalid.
        # urllib also throws errors on 404, so all is well.
        return None
    url.close()
    return 1


def preview_url(url, with_headers=False):
    '''Simple URL opener. Returns the page.

    >>> urlpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
    ...     'assets', 'tests', 'generic_preview_url.html'))
    >>> url = 'file://' + urlpath
    >>> preview_url(url).find(b'DOCTYPE')
    2

    Use the optional argument "with_headers=True" to receive a tuple
    (body, headers). Headers are transmitted

    >>> body, headers = preview_url(url, with_headers=True)
    >>> headers['Content-Type']
    'text/html'

    '''
    try:
        url = urlopen(url)
    except Exception:  # ValueError, URLError, InvalidURL, HTTPError,
        # May throw an IOError. If it does, the URL is invalid.
        # urllib also throws errors on 404, so all is well.
        return None
    data = url.read()
    if hasattr(url, 'headers'):
        # Python 3
        headers = url.headers
    else:
        # Python 2
        url.info().headers
        # convert to a dictionary
        headers = dict([h.strip().split(': ') for h in headers])
    url.close()
    if with_headers:
        return data, headers
    return data


def validate_domain(domain):
    """Check for valid domain name.
    Currently a valid domain name may consist of one or more labels.
    Each label contains between 1 and 63 characters.
    Valid characters are a-z, 0-9 and '-'.
    Labels are seperated by Dots.
    A domain name is also valid if it is an ipaddress

    Domain names using Punycode (for international domains) are currently
    not supported! (those starting with xn-- in ASCII representation,
    see rfc3492)

    >>> validate_domain('127.0.0.1')
    True
    >>> validate_domain('localhost')
    True
    >>> validate_domain('perfact.de')
    True
    >>> validate_domain('ema.perfact.de')
    True
    >>> validate_domain('foo.bar.batz.ema.perfact.de')
    True

    >>> validate_domain('double..Dot')
    False
    >>> validate_domain('10.1.2.3,127.0.0.1:25')
    False
    >>> validate_domain('perfact.veryveryveryveryveryveryveryveryvery'
    ...                 'veryveryveryveryveryverylong.domain')
    False
    >>> validate_domain('@.not.allowed.character')
    False

    """
    domain = domain.lower()
    """using validate_ipaddr can not be used here because it accepts too much
    e.g. 127.0.0.1,127.0.0.2 or 192.168.2.1/24
    TODO: add a way for this function to accept ipv6 as valid domain
    try:
       if(validate_ipaddr(domain)):
            return True
    except ValueError:
        #not an ipaddress
        pass"""
    exp = "^([a-z0-9-]{1,63}\\.)*[a-z0-9-]{1,63}$"
    res = re.match(exp, domain)
    if res is not None:
        return True

    return False


def validate_domain_list(domain_list, delimeter=','):
    """Validate a string containing a list of domain names.
    The list will be split at the given delimeter(default = ',').
    Each Element of the list will then be checked using the validate_domain
    function.
    >>> validate_domain_list('single.domain')
    True
    >>> validate_domain_list('first.domain, second.domain.name')
    True
    >>> validate_domain_list('127.0.0.1, mixed.ip.and.domain')
    True
    >>> validate_domain_list('other.delimeter/ localhost', delimeter='/')
    True
    >>> validate_domain_list('multiple.spaces,        localhost')
    True
    >>> validate_domain_list('no.spaces,localhost')
    True

    >>> validate_domain_list('mixed.valid.and.unvalid, double..dot, localhost')
    False
    """
    elems = domain_list.split(delimeter)
    for elem in elems:
        res = validate_domain(elem.strip())
        if not res:
            return False
    return True


def validate_ipaddr(ipaddr):
    '''Check for valid IPv4/IPv6 address in VLSM/CIDR notation
    You may pass a list.
    Returns a list of the validated inputs
    >>> (validate_ipaddr('127.0.0.1') ==
    ...  u'127.0.0.1/32')
    True

    >>> (validate_ipaddr('8.8.8.8, 8.8.8.10/32') ==
    ...  u'8.8.8.8/32, 8.8.8.10/32')
    True

    >>> (validate_ipaddr(['192.168.0.0/24', '172.16.0.0/16']) ==
    ...  [u'192.168.0.0/24', u'172.16.0.0/16'])
    True

    >>> validate_ipaddr('::1') == u'::1/128'
    True
    '''
    list_mode = True
    if isinstance(ipaddr, STRING_TYPES):
        list_mode = False
        ipaddr = [a.strip() for a in to_ustring(ipaddr).split(',')]
    # This will raise ValueError if invalid.
    out = []
    for val in ipaddr:
        out.append(to_ustring(ipaddress.ip_network(to_ustring(val))))
    if list_mode:
        return out
    else:
        return ', '.join(out)


def validate_port(port):
    ''' Check if valid port or valid portrange are given
    - Check for values below zero
    - Valid portranges are x:y (where x < y)
    - Portrange can also be defined as :y or x: (to reach from 0:y or x:65535)
    - valid ports are < 65536

    >>> validate_port(123)
    '123'

    >>> validate_port('80:443')
    '80:443'

    >>> validate_port(':80')
    ':80'

    >>> validate_port('80:')
    '80:'

    >>> validate_port('asdf')
    Traceback (most recent call last):
        ...
    ValueError: Port or Portrange is invalid!

    '''
    # We need a string for regular expression checking
    port = str(port)

    # special case ':'
    if port == ':':
        raise ValueError("Portrange is invalid. "
                         "At minimum one port must be defined!")

    # Portrange contains the separator :
    if re.match(r"^[0-9]*:[0-9]*$", port):
        portsplit = port.split(':')
        # If a portrange is niltext set it to the appropriate value
        if portsplit[0] == '':
            portsplit[0] = 1
        if portsplit[1] == '':
            portsplit[1] = 65535

        # typecast to int for value comparison
        portsplit[0] = int(portsplit[0])
        portsplit[1] = int(portsplit[1])

        # check if both ports are in a valid range
        if portsplit[0] > 65535 or portsplit[1] > 65535:
            raise ValueError("A single port of a portrange must be < 65535! "
                             "(Got: %s, %s)" %
                             (portsplit[0], portsplit[1]))

        # Check if first integer < second integer
        if portsplit[0] < portsplit[1]:
            return port
        else:
            raise ValueError("The second port of a portrange has to be "
                             "greater than the first!")

    # Single port
    elif re.match(r"^[0-9]*$", port):
        # Check if single port < 65536
        if int(port) > 0 and int(port) < 65536:
            return port
        else:
            raise ValueError("Error in validation of single Port!")

    # No valid port or portrange
    else:
        raise ValueError("Port or Portrange is invalid!")


# Converting from/to version strings

def version_to_triplet(version):
    '''Safely convert any version string into a tuple
    of (major, minor, patchlevel).

    >>> version_to_triplet('1.0.0')
    (1, 0, 0)

    >>> version_to_triplet('2')
    (2, 0, 0)

    >>> version_to_triplet('broken')
    (0, 0, 0)

    >>> version_to_triplet('1.a.b')
    (1, 0, 0)

    >>> version_to_triplet('a.b.1')
    (0, 0, 1)

    >>> version_to_triplet('1.2.3.4')
    (1, 2, 3)
    '''
    triplet = version.split('.')[:3]
    while len(triplet) < 3:
        triplet.append('0')
    triplet = [int(a) if a.isdigit() else 0 for a in triplet]
    return tuple(triplet)


def triplet_to_version(triplet):
    '''Convert a version triplet in a tuple (major, minor, patchlevel)
    into a version string.
    >>> triplet_to_version((1, 2, 3))
    '1.2.3'
    '''
    assert len(triplet) == 3, "Corrupt triplet"
    return '.'.join(map(str, triplet))


# --- Generic, but Zope-specific


def read_pdata(obj):
    '''Avoid authentication problems when reading linked pdata.'''
    if type(obj.data) in (type(''), type(b'')):
        source = obj.data
    else:
        data = obj.data
        source = None
        while data is not None:
            if source is None:
                source = data.data
            else:
                source += data.data
            data = data.next
    return source


def get_property_or_method(context, name, acquire=True):
    '''Locate the given property or method. Acquisition can optionally be
    turned off.'''

    if acquire:
        value = getattr(context, name, None)
    else:
        value = context.getProperty(name, None)
        if value is None and name in context.objectIds():
            value = getattr(context, name, None)

    if callable(value):
        value = value()
    return value


def safe_call(context, method):
    '''Try to acquire an object and call it (useful for hooks.'''
    obj = getattr(context, method, None)
    if obj and callable(obj):
        return obj()
    return None


def load_module(filename, name='config'):
    '''Load a module by its full "filename" and return it.

    This is mainly useful for loading python configuration files and access
    the variables therein as attributes.

    >>> mod = load_module(
    ...     os.path.dirname(__file__) +
    ...     '/assets/tests/generic_config_module.py')
    >>> (mod.configval1, mod.configval2) == (12345, 'Hello world!')
    True
    '''
    if six.PY2:
        mod = imp.load_source(name, filename)
        return mod

    loader = importlib.machinery.SourceFileLoader(name, filename)
    spec = importlib.util.spec_from_loader(loader.name, loader)
    mod = importlib.util.module_from_spec(spec)
    loader.exec_module(mod)
    return mod


def module_to_dict(mod):
    '''Return the contents of variables from a config module as a dictionary,
    skipping any fields starting with _.

    >>> mod = load_module(
    ...     os.path.dirname(__file__) +
    ...     '/assets/tests/generic_config_module.py')
    >>> module_to_dict(mod) == {
    ...     'configval1': 12345,
    ...     'configval2': 'Hello world!',
    ... }
    True
    '''
    return {
        name: getattr(mod, name)
        for name in dir(mod)
        if not name.startswith('_')
    }


def load_config(filename, name='config'):
    '''Load the module at "filename" as module "name". Return the contents
    as a dictionary.

    >>> load_config(
    ...     os.path.dirname(__file__) +
    ...     '/assets/tests/generic_config_module.py'
    ... ) == {
    ...     'configval1': 12345,
    ...     'configval2': 'Hello world!',
    ... }
    True
    '''
    return module_to_dict(load_module(filename, name))


def extract_authorization(req):
    '''Read the original value from the authorization headers.
    '''
    return req._auth


def recursive_u_to_str(obj):
    '''This function recursively converts unicode to utf-8 encoded str in
    Python2 and leaves the original unchanged in Python3. This is useful when
    processing JSON values from the database in Zope2, which needs to use
    8-bit-strings consistently.
    >>> recursive_u_to_str(u'\\xe4\\xf6\\xfc') == 'äöü'
    True
    >>> (recursive_u_to_str(
    ...     [1, 'äöü', u'\\xe4\\xf6\\xfc']) ==
    ...     [1, 'äöü', 'äöü'])
    True
    >>> (recursive_u_to_str(
    ...     (1, 'äöü', u'\\xe4\\xf6\\xfc')) ==
    ...     (1, 'äöü', 'äöü'))
    True
    >>> (recursive_u_to_str(
    ...     {'a': 'äöü', u'b': u'\\xe4\\xf6\\xfc'}) ==
    ...     {'a': 'äöü', 'b': 'äöü'})
    True
    >>> (recursive_u_to_str(
    ...     {
    ...         u'a': [u'b', 'c', u'd', 12],
    ...         u'b': {u'a': u'\\xe4\\xf6\\xfc'},
    ...         u'c': [1, {u'a': u'\\xe4\\xf6\\xfc'}],
    ...     }) ==
    ...     {
    ...         'a': ['b', 'c', 'd', 12],
    ...         'b': {'a': 'äöü'},
    ...         'c': [1, {'a': 'äöü'}],
    ...     })
    True
    >>> recursive_u_to_str(six) == six
    True
    '''
    # We only need to convert anything in Python2
    if not six.PY2:
        return obj

    if isinstance(obj, six.text_type):
        return obj.encode('utf-8')
    if isinstance(obj, dict):
        return {
            recursive_u_to_str(key): recursive_u_to_str(value)
            for key, value in obj.items()
        }
    if isinstance(obj, list):
        return [recursive_u_to_str(item) for item in obj]
    if isinstance(obj, tuple):
        return tuple((recursive_u_to_str(item) for item in obj))

    return obj


def re_groupdict(pattern, string):
    """
    Search for regular expression pattern in string and return a dictionary of
    the named groups or None if the pattern does not match.

    >>> re_groupdict('(?P<key>\\\\w+)', 'value')
    {'key': 'value'}

    >>> re_groupdict('start(?P<key>\\\\w+)', 'value')

    >>> re_groupdict('x (?P<key1>\\\\d+) (?P<key2>\\\\d+) y', 'x 123 456 y')==(
    ...     {'key1': '123', 'key2': '456'})
    True

    """
    res = re.search(pattern, string)
    return res.groupdict() if res else None
